/**
 * IK 中文分词  版本 5.0
 * IK Analyzer release 5.0
 * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 * 
 */
package org.wltea.analyzer.dic;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collection;
import java.util.List;

import org.wltea.analyzer.cfg.Configuration;
import org.zhiqim.kernel.logging.Log;
import org.zhiqim.kernel.logging.LogFactory;
import org.zhiqim.kernel.util.Closes;
import org.zhiqim.kernel.util.Resources;

/**
 * 词典管理类,单子模式
 */
public class Dictionary
{
    private static final Log log = LogFactory.getLog(Dictionary.class);
    
    /** 词典单子实例 */
    private static Dictionary singleton;
    
    /** 配置对象 */
    private Configuration cfg;

    private DictSegment _MainDict;          //主词典对象
    private DictSegment _StopWordDict;      //停止词词典
    private DictSegment _QuantifierDict;    //量词词典

    private Dictionary(Configuration cfg)
    {
        this.cfg = cfg;
        this.loadMainDict();
        this.loadExtDict();
        this.loadStopWordDict();
        this.loadQuantifierDict();
    }

    /**
     * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
     * 只有当Dictionary类被实际调用时，才会开始载入词典， 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
     * @return Dictionary
     */
    public static Dictionary initial(Configuration cfg)
    {
        if (singleton == null)
        {
            synchronized (Dictionary.class)
            {
                if (singleton == null)
                {
                    singleton = new Dictionary(cfg);
                    return singleton;
                }
            }
        }
        return singleton;
    }

    /**
     * 获取词典单子实例
     * @return Dictionary 单例对象
     */
    public static Dictionary getSingleton()
    {
        if (singleton == null)
        {
            throw new IllegalStateException("词典尚未初始化，请先调用initial方法");
        }
        return singleton;
    }

    /**
     * 批量加载新词条
     * @param words Collection<String>词条列表
     */
    public void addWords(Collection<String> words)
    {
        if (words != null)
        {
            for (String word : words)
            {
                if (word != null)
                {
                    // 批量加载词条到主内存词典中
                    singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
                }
            }
        }
    }

    /**
     * 批量移除（屏蔽）词条
     * @param words
     */
    public void disableWords(Collection<String> words)
    {
        if (words != null)
        {
            for (String word : words)
            {
                if (word != null)
                {
                    // 批量屏蔽词条
                    singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
                }
            }
        }
    }

    /**
     * 检索匹配主词典
     * @param charArray
     * @return Hit 匹配结果描述
     */
    public Hit matchInMainDict(char[] charArray)
    {
        return singleton._MainDict.match(charArray);
    }

    /**
     * 检索匹配主词典
     * @param charArray
     * @param begin
     * @param length
     * @return Hit 匹配结果描述
     */
    public Hit matchInMainDict(char[] charArray, int begin, int length)
    {
        return singleton._MainDict.match(charArray, begin, length);
    }

    /**
     * 检索匹配量词词典
     * @param charArray
     * @param begin
     * @param length
     * @return Hit 匹配结果描述
     */
    public Hit matchInQuantifierDict(char[] charArray, int begin, int length)
    {
        return singleton._QuantifierDict.match(charArray, begin, length);
    }

    /**
     * 从已匹配的Hit中直接取出DictSegment，继续向下匹配
     * @param charArray
     * @param currentIndex
     * @param matchedHit
     * @return Hit
     */
    public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit)
    {
        DictSegment ds = matchedHit.getMatchedDictSegment();
        return ds.match(charArray, currentIndex, 1, matchedHit);
    }

    /**
     * 判断是否是停止词
     * @param charArray
     * @param begin
     * @param length
     * @return boolean
     */
    public boolean isStopWord(char[] charArray, int begin, int length)
    {
        return singleton._StopWordDict.match(charArray, begin, length).isMatch();
    }

    /************************************************************************************************/
    //加载词典文件（主词典，扩展主词典，停止词典，量词词典）
    /************************************************************************************************/
    
    /** 加载主词典文件 */
    private void loadMainDict()
    {
        _MainDict = new DictSegment((char) 0);
        
        InputStream is = Resources.getResourceStream(this.getClass(), cfg.getMainDictionary());
        if (is == null)
            throw new RuntimeException("主词典文件未找到");
        
        load(_MainDict, "主词典", cfg.getMainDictionary(), is);
    }

    /** 加载用户配置的扩展词典到主词库表 */
    private void loadExtDict()
    {
        List<String> extDictFiles = cfg.getExtDictionarys();
        if (extDictFiles == null)
            return;
        
        for (String extDictName : extDictFiles)
        {
            InputStream is = Resources.getResourceStream(this.getClass(), extDictName);
            if (is == null)
                continue;
            
            load(_MainDict, "扩展主词典", extDictName, is);
        }
    }

    /** 加载用户扩展的停止词词典 */
    private void loadStopWordDict()
    {
        _StopWordDict = new DictSegment((char) 0);
        List<String> extStopWordDictFiles = cfg.getExtStopWordDictionarys();
        if (extStopWordDictFiles == null)
            return;
        
        for (String extStopWordDictName : extStopWordDictFiles)
        {
            InputStream is = Resources.getResourceStream(this.getClass(), extStopWordDictName);
            if (is == null)
                continue;
            
            load(_StopWordDict, "扩展停止词典", extStopWordDictName, is);
        }
    }

    /** 加载量词词典 */
    private void loadQuantifierDict()
    {
        _QuantifierDict = new DictSegment((char) 0);

        InputStream is = Resources.getResourceStream(this.getClass(), cfg.getQuantifierDicionary());
        if (is == null)
            throw new RuntimeException("量词词典文件未找到");
        
        load(_QuantifierDict, "量词词典", cfg.getQuantifierDicionary(), is);
    }

    /**
     * 加载流
     * 
     * @param dict      词典对象
     * @param name      词典名称
     * @param path      词典路径
     * @param is        输入流
     */
    private void load(DictSegment dict, String name, String path, InputStream is)
    {
        try
        {
            BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
            String theWord = null;
            do
            {
                theWord = br.readLine();
                if (theWord != null && !"".equals(theWord.trim()))
                {
                    dict.fillSegment(theWord.trim().toLowerCase().toCharArray());
                }
            }
            while (theWord != null);

            log.info("加载[%s][%s]完成", name, path);
        }
        catch (IOException e)
        {
            log.error("加载[%s][%s]异常", name, path, e);
        }
        finally
        {
            Closes.closeIgnoreException(is);
        }
    }
}
